* regression of proximate determinants on child survival

cd "~/Documents/econ/research/current/cognitive/estimation/data"

discard
adopath ++ ~/Documents/econ/research/stata/ado/personal/outreg
use WDI, clear
merge 1:1 countrycode year using malaria, nogen
merge 1:1 countrycode year using lead, nogen
merge m:1 countrycode using tropics, keep(match master) nogen
merge 1:1 countrycode year using ICRG_panel, keep(match master) nogen
// merge 1:1 countrycode year using stunted, nogen // no more than 40 obs/yr
compress

cd "~/Documents/econ/research/current/cognitive/estimation/dofiles"

gen isr = round(1000-imr,.1)
gen csr = round(1000-cmr,.1)
replace malaria = malaria/10
label var malaria "Malaria cases per million people"

encode countrycode, generate(isonum)
xtset isonum year
gen lgdplag = ln(L10.gdpPPPWDI)
gen lgdp = ln(gdpPPPWDI)
keep if year>=2000

label var sanitation "Sanitation (\% of households)"
label var water	"Safe water (\% of households)"
label var anemia "Anemia (\% of children \(<\) 5)"
label var undernut "Undernutrition (\% of pop.)"
label var bcg "Immunization, TB (\% of children \(<\) 2)"
label var dpt "Immunization, DPT (\% of children \(<\) 2)"
label var measles "Immunization, Measles (\% of children \(<\) 2)"
label var lgdplag "log GDP per capita, lagged 10 yrs"

// misschk hiv bcg sanitation water anemia malaria dpt measles undernut lgdppc csr, gen(miss)
xtserial csr anemia water sanitation dpt undernut measles bcg lgdplag
// no sign of AR(1)
// variables left out: breastfeeding, malaria

local oregopts `"se varlabel squarebrack sigsymbols(+,*,**,***) starlevels(10 5 1 0.1) noautosumm nodisplay"'
 
xtreg csr anemia water sanitation dpt undernut measles bcg, fe
xt_r2total
outreg, ctitle("" "Fixed" \"" "Effects") `oregopts' ///
	 addrow("N", "`: display %5.0fc e(N)'" \ 		///
		"Countries", "`:display %3.0f e(N_g)'" \ 	///
		"\(R^2 ({Corr(\hat{y},y)}^2)\)", "`:display %4.2f e(r2_t)'")
outreg, store(keynote) stat(b) varlabel nodisplay ctitle("" "Fixed" \"" "Effects") ///
	  starloc(1) sigsymbols(+,*,**) starlevels(10 5 1) noautosumm ///
	 addrow("N", "`: display %5.0fc e(N)'" ///
		\ "Countries", "`:display %3.0f e(N_g)'" ///
		\ "\(R^2 ({Corr(\hat{y},y)}^2)\)", "`:display %4.2f e(r2_t)'")

shapleyx anemia water sanitation dpt undernut measles bcg,  ///
	percent result(e(r2)): xtreg csr @, fe
mata: s = st_matrix("r(decompos)")[,2]; ///
	s_pct = s[1..rows(s)-1]/s[rows(s)]*100; ///
	st_matrix("shap", s_pct); ///
	st_matrix("shap_cum", (s_pct, runningsum(s_pct)))
matrix rownames shap = `r(names)'
matrix rownames shap_cum = `r(names)'
frmttable, statmat(shap) merge nodisplay ///
	varlabel sdec(1) ctitle("" "Shapley" \ "" "\% of \(R^2\)")
frmttable using ../../draft/images/csr_prox_kn, statmat(shap) merge(keynote) ///
	tex frag varlabel sdec(1) ctitle("" "Shapley" \ "" "\% of \(R^2\)")
mat l shap_cum

// ------ now include GDP -------
xtreg csr anemia water sanitation dpt undernut measles bcg lgdplag, fe
xt_r2total
outreg, merge ctitle("" "Including" \"" "GDP") `oregopts' ///
	 addrow("N", "`: display %5.0fc e(N)'" \ 		///
		"Countries", "`:display %3.0f e(N_g)'" \ 	///
		"\(R^2 ({Corr(\hat{y},y)}^2)\)", "`:display %4.2f e(r2_t)'")

shapleyx anemia water dpt undernut measles bcg lgdplag,  ///
	percent result(e(r2)): xtreg csr @, fe
mata: s = st_matrix("r(decompos)")[,2]; ///
	s_pct = s[1..rows(s)-1]/s[rows(s)]*100; ///
	st_matrix("shap", s_pct); ///
	st_matrix("shap_cum", (s_pct, runningsum(s_pct)))
matrix rownames shap = `r(names)'
matrix rownames shap_cum = `r(names)'
frmttable using ../../draft/images/csr_prox, statmat(shap) merge ///
	tex frag varlabel squarebrack sdec(1) ctitle("" "Shapley" \ ///
		"" "\% of \(R^2\)")
mat l shap_cum
		
// n.b. air pollution measures didn't work - only one year of data 
//   and airpoll is negatively correlated

// calculate average fixed effects for tropical vs. nontropical countries
//	result - even bigger than the difference between the two without any
//	covariates!
qui reg csr anemia water sanitation lgdplag dpt undernut measles bcg i.isonum
keep if e(sample)
qui tab isonum if tropics, nolabel matrow(trop)
qui tab isonum if !tropics, nolabel matrow(nontrop)
local Nnt = rowsof(nontrop) - 1 // first nontrop country is the excluded base category
forvalues r = 2/`=rowsof(nontrop)' {
	local testlist "`testlist'-`=nontrop[`r',1]'.isonum/`Nnt' "
}
local Nt = rowsof(trop)
forvalues r = 1/`Nt' {
	local testlist "`testlist' + `=trop[`r',1]'.isonum/`Nt'"
}
lincom `testlist'
scalar trop_eff = r(estimate)
sum csr if !tropics
scalar csr_nt = r(mean)
sum csr if tropics
scalar csr_diff = csr_nt - r(mean)
di "Average tropical country effect as % of CSR gap:"
di trop_eff/csr_diff*100
